# import regular expresson library - re
import re
text = "Search is the target. let's search the Search using " + \
"search which can be accessed by re's search"
search_result = re.search('Search', text)
if search_result:
print("re's search was successfull in searching the Search!")
# print(search_result)
re's search was successfull in searching the Search!
# re.search for searching a simple pattern (without any special characters)
text = [
'Regular expressions can be used for advanced text pattern matching, extraction, and/or search-and-replace functionality.',
'Shortened as regex.',
'Regex is a sequence of characters that define a search pattern.',
'Regular expressions are used in :',
'Search engines',
'Search and replace dialogs of word processors and text editors',
'Text processing utilities such as sed and AWK',
'Lexical analysis Regular',
'Registr'
]
for line in text:
if(re.search('Search', line)):
print('Line -', line)
Line - Search engines Line - Search and replace dialogs of word processors and text editors
Ignore case while search. I for Ignore!
# re.I can be used to ignore the case of the text
for line in text:
if(re.search('Search', line, re.I)):
print('Line -', line)
Line - Regular expressions can be used for advanced text pattern matching, extraction, and/or search-and-replace functionality. Line - Regex is a sequence of characters that define a search pattern. Line - Search engines Line - Search and replace dialogs of word processors and text editors
Search for any line that starts with Reg followed by any 3 characters and ends with r
for line in text:
if line[:3] == 'Reg':
if len(line) >= 6:
if line[6] == 'r':
print(line)
Regular expressions can be used for advanced text pattern matching, extraction, and/or search-and-replace functionality. Regular expressions are used in : Registr
regex way!# Search for any line that starts with Reg followed by any 3 characters and ends with r
for line in text:
if(re.search('^Reg...r', line)):
print(line)
Regular expressions can be used for advanced text pattern matching, extraction, and/or search-and-replace functionality. Regular expressions are used in : Registr
if re.match(".at", "mat"):
print("Matched")
else:
print("Not matched")
Matched
if re.match(".ent$", "tent"):
print("Matched")
else:
print("Not Matched")
Matched
if re.match(".ent$", "tents"):
print("Matched")
else:
print("Not Matched")
Not Matched
register= input()
if re.match("^[1-9][0-9][a-zA-Z][a-zA-Z][a-zA-Z][0-9][0-9][0-9][0-9]$",register):
print("Valid")
else:
print("Invalid")
20bce0000 Valid
# We can aso write the same expression in the following way
register= input()
if re.match("^[1-9][0-9][a-zA-Z]{3}[0-9]{4}$",register):
print("Valid")
else:
print("Invalid")
20bce0000 Valid
| Symbol | Description |
|---|---|
| literal | Match literal string value literal |
| re1 | re2 | Match re1 or re2 |
| ? | matches zero or one of the preceding group. |
| * | matches zero or more of the preceding group. |
| + | matches one or more of the preceding group. |
| {n} | matches exactly n of the preceding group. |
| {n,} | matches n or more of the preceding group. |
| {,m} | matches 0 to m of the preceding group. |
| {n,m} | matches at least n and at most m of the preceding group. |
| ^spam | means the string must begin with spam. |
| spam$ | means the string must end with spam. |
| Period (.) | matches any character, except newline characters. |
| \d, \w, and \s | match a digit, word, or space character, respectively. |
| \D, \W, and \S | match anything except a digit, word, or space character, respectively. |
| [abc] | matches any character between the brackets (such as a, b, or c). |
| [^abc] | matches any character that isn’t between the brackets. |
Now go back to register number validator program. It should be clear to you, now.
my_string = input("Enter your input ")
if re.match('.*', my_string):
print("Matched the input string")
Enter your input Bhargavi Matched the input string
number = input('Enter input ')
if re.match("^\-?[1-9][0-9]*$",number): #'\' is added in front of '-' to overcome its default meaning in REs
print('Integer')
else:
print('Not an integer')
Enter input 1345 Integer
text = 'mail from abc@gmail.com to xyz@vit.ac.in about meeting @2PM'
mailid = re.findall('\S+@\S+', text)
print(mailid)
['abc@gmail.com', 'xyz@vit.ac.in']
# Search and extract all the contact numbers from the customers details
# Use '()' to group the pattern which has to be extrated
customer_details = [
'name1 name1@abc.com contact number: 1234567891 someotherinfo',
'This does not have contact info',
'name2 abc@xyx.co.in contact number: 9940693362 additional info',
'my_name aaa123@abc.com contact number: 8765432567 another number contact number: 2345678912'
]
for item in customer_details:
contact_numbers = re.findall('.*contact number: ([1-9][0-9]{9}).*', item)
if (len(contact_numbers) > 0):
print(contact_numbers)
['1234567891'] ['9940693362'] ['2345678912']
# Search and extract all the contact numbers from the customers details
# Use '()' to group the pattern which has to be extrated
customer_details =[
'name1 name1@abc.com contact number: 1234567891 someotherinfo',
'This does not have contact info',
'name2 abc@xyx.co.in contact number: 9940693362 additional info',
'my_name aaa123@abc.com contact number: 8765432567 another number contact number: 2345678912'
]
for item in customer_details:
contact_numbers = re.findall('contact number: ([1-9][0-9]{9})', item)
if (len(contact_numbers) > 0):
print(contact_numbers)
['1234567891'] ['9940693362'] ['8765432567', '2345678912']